####Welcome to Global Dominant Party Systems (GDPS) Dataset replication material####

#First step: install and load necessary packages
install.packages(c('quantmod','ff','foreign','R.matlab'),dependency=T)
install.packages('readr')
install.packages('tidyverse')
install.packages('ggpubr')
install.packages('foreign', dependency = T)
install.packages('nnet', dependency = T)
install.packages('stargazer', dependency = T)
install.packages('ggplot2', dependency = T)
install.packages('broom')
install.packages("ggeffects")
install.packages('knitr')
install.packages("kableExtra")
install.packages('gtsummary')
install.packages('marginaleffects')
install.packages('car')
library(readr)
library(tidyverse)
library(ggpubr)
library(nnet)
library(foreign)
library(nnet)
library(stargazer)
library(ggplot2)
library(ggeffects)
library(broom)
library(knitr)
library(kableExtra)
library(gtsummary)
library(marginaleffects)
library(car)
library(ggsci)
library(cowplot)
library(gridExtra)


#Second step: load the dataset (gdps.csv) from your computer directory

data1 <- read.csv ("your file directory here", sep = ";")

#Note: if there are any issues with loading the dataset, make sure to check if the separator is ";" or ",". This sometimes can be affected by World Region settings in your computer. Hence, if ";" does not work, then "," should.


####Re-coding the variables before producing the plots####

#First, let's create a new variable based on "region_UN" where we will bundle some sub-regions together, that is, move a level up on the list of regions and sub-regions proposed by the UN Statistics Division, which you can find at https://unstats.un.org/unsd/methodology/m49/
#This is done to simplify the graph and is in line with the literature which tends to analyze some sub-regions as part of the region such as "Sub-Saharan Africa"

data1$region_2 <- data1$region_UN

#Now, let's bundle some sub-regions together to make the interpretation of the data easier
#Sub-Saharan Africa

data1$region_2[data1$region_2 == "Eastern Africa"] <- "Sub-Saharan Africa"
data1$region_2[data1$region_2 == "Middle Africa"] <- "Sub-Saharan Africa"
data1$region_2[data1$region_2 == "Southern Africa"] <- "Sub-Saharan Africa"
data1$region_2[data1$region_2 == "Western Africa"] <- "Sub-Saharan Africa"

#Latin America and the Caribbean

data1$region_2[data1$region_2 == "Caribbean"] <- "Latin America and the Caribbean"
data1$region_2[data1$region_2 == "Central America"] <- "Latin America and the Caribbean"
data1$region_2[data1$region_2 == "South America"] <- "Latin America and the Caribbean"
data1$region_2[data1$region_2 == "Western Africa"] <- "Latin America and the Caribbean"

#Oceania

data1$region_2[data1$region_2 == "Australia and New Zealand"] <- "Oceania"
data1$region_2[data1$region_2 == "Melanesia"] <- "Oceania"
data1$region_2[data1$region_2 == "Micronesia"] <- "Oceania"
data1$region_2[data1$region_2 == "Polynesia"] <- "Oceania"


#Now, let's shorten the names of the regions so they fit the ggplot graph more pretty
#For this, I created a separate variable to explain the process, it can of course be done in simpler ways

data1$region_3 <- data1$region_2
data1$region_3[data1$region_3 == ""] <- NA #this is to fix for the absence of region for Taiwan
data1$region_3[data1$region_3 == "Western Europe"] <- "W. Europe"
data1$region_3[data1$region_3 == "Sub-Saharan Africa"] <- "Sub-Sah. Africa"
data1$region_3[data1$region_3 == "Southern Europe"] <- "S. Europe"
data1$region_3[data1$region_3 == "Northern Europe"] <- "N. Europe"
data1$region_3[data1$region_3 == "Northern America"] <- "N. America"
data1$region_3[data1$region_3 == "Northern Africa"] <- "N. Africa"
data1$region_3[data1$region_3 == "Latin America and the Caribbean"] <- "L. Am. & Carib."
data1$region_3[data1$region_3 == "Eastern Europe"] <- "E. Europe"



#We need one more preparation before coding the ggplot plots - collapsing the "Assembly-Elected" and "Presidential" categories of political system into one. 
#The DPI2020 database suggests that in Assembly-Elected systems, it is the president that holds significant executive powers, hence this decision can make sense and is suitable for parsimonious data visualization.

data1$firstvictory_polsys_dpi2 <- data1$firstvictory_polsys_dpi #this for the time when the system was established, that is at the time of first out of three consecutive victories
data1$firstvictory_polsys_dpi2[data1$firstvictory_polsys_dpi2 == "Assembly-Elected President"] <- "Presidential"
data1$t_polsys_dpi2 <- data1$t_polsys_dpi #this is if we would want to visualize the distribution per political system type at the time of turnover elections (which in my dataset is marked with "t")
data1$t_polsys_dpi2[data1$t_polsys_dpi2 == "Assembly-Elected President"] <- "Presidential"

#now that we have all the variables ready, we can move on to making the plots




####The four plots: Party system change outcomes, Executive dominance longevity, Number of cases per region, Trends of party system change over time####

#plot 1 - party system change outcomes per political system type, totals for a period 1900-2024
#first, let's do little coding: we don't need those cases with 'pending change' in the graph, just the ones for which we know the outcome
data1$partysystem_change[data1$partysystem_change == "pending"] <- NA

#then, let's prepare the code for the first plot in ggplot
#we will use shades of black to make the plots readable for everyone
p1 <- ggplot(data=subset(data1, !is.na(partysystem_change)), aes(x=partysystem_change, fill = t_polsys_dpi2)) +
  geom_bar()+
  labs(
    title = "Party system change outcomes (1900-2024)",
    caption = "Note: 'Assembly-Elected President' and 'Presidential' types collapsed",
    x = "Outcome",
    y = "Cases",
    fill='Political system type')+
  scale_fill_manual(values=c("black", "grey"))+
  scale_y_continuous(limits = c(0, 80))+
  theme_bw()+
  theme(axis.title.x = element_text(margin = margin(t = 10)),
        axis.title.y = element_text(margin = margin(r = 10)), 
        plot.title = element_text(hjust = 0.5), 
        plot.margin = unit(c(0.2, 0.2, 0.2, 0.2), "inches"),
        legend.position = "bottom",
        legend.text = element_text(size = 10),
        legend.title = element_text(size = 10),
        axis.title = element_text(size = 12),
        axis.text.x = element_text(size = 12))

#plot 2 - now let's prepare the plot for the duration of uninterrupted executive
p2 <- ggplot (data=subset(data1, !is.na(dominance_months)), aes(x=dominance_months)) +
  geom_histogram(binwidth = 10, color = "black", fill = "lightgray")+
  labs(
    title = "Longevity of dominant parties (1900-2024)",
    x = "Duration of executive dominance (in months)",
    caption = "Note: data for cases with known start and end dates",
    y = "Cases")+
  scale_x_continuous(limits = c(0, 900), breaks = seq(0,900,60))+
  scale_y_continuous(limits = c(0, 15))+
  theme_bw()+
  theme(axis.title.x = element_text(margin = margin(t = 10)),
        axis.title.y = element_text(margin = margin(r = 10)),
        plot.title = element_text(hjust = 0.5),
        plot.margin = unit(c(0.2, 0.2, 0.2, 0.2), "inches"),
        axis.title = element_text(size = 12),
        axis.text.x = element_text(size = 10),
        axis.text.y = element_text(size = 10))


#plot 3 - this is the plot for the cases of dominant party systems per region and political system type
p3 <- ggplot(data=subset(data1, !is.na(region_3)), aes(x=region_3, fill = firstvictory_polsys_dpi2)) +
  geom_bar()+
  labs(
    title = "Dominant Party Systems per Region (1900-2024)",
    caption = "Note: 'Assembly-Elected President' and 'Presidential' types collapsed",
    x = "Region",
    y = "Number of cases",
    fill='Political system type')+
  scale_fill_manual(values=c("black", "grey"))+
  scale_y_continuous(limits = c(0,50))+
  theme_bw(base_size = 12)+
  theme(axis.title.x = element_text(margin = margin(t = 10)),
        axis.title.y = element_text(margin = margin(r = 10)), 
        plot.title = element_text(hjust = 0.5),
        plot.margin = unit(c(0.2, 0.2, 0.2, 0.2), "inches"),
        axis.title = element_text(size = 12),
        axis.text.x = element_text(size = 12),
        axis.text.y = element_text(size = 10),
        legend.text = element_text(size = 10),
        legend.title = element_text(size = 10),
        legend.position = "bottom")+
  coord_flip()

#plot4 - this is the local regression plot for presenting the trends of different outcomes of party system change over time
#you will see that I also added labels for each outcome to make the lines more readable

#making dummy variables for loess plot

data1$transformation <- NA
data1$transformation[data1$partysystem_change == "transformation"] <- 1
data1$transformation[data1$partysystem_change == "alternating"] <- 0
data1$transformation[data1$partysystem_change == "interrupted"] <- 0

data1$alternating <- NA
data1$alternating[data1$partysystem_change == "transformation"] <- 0
data1$alternating[data1$partysystem_change == "alternating"] <- 1
data1$alternating[data1$partysystem_change == "interrupted"] <- 0

data1$interrupted <- NA
data1$interrupted[data1$partysystem_change == "transformation"] <- 0
data1$interrupted[data1$partysystem_change == "alternating"] <- 0
data1$interrupted[data1$partysystem_change == "interrupted"] <- 1


p4 <- ggplot(data1, aes(x=data1$t_year)) + 
  geom_smooth(aes(y = data1$transformation), color = "black", method = "loess", se=F) + 
  geom_smooth(aes(y = data1$alternating),method = "loess", color="gray", se=F)+
  geom_smooth(aes(y = data1$interrupted),method = "loess", color="lightgray", se=F)+
  scale_y_continuous(limits = c(0, 1))+
  geom_label(
    label="Transformation", 
    x=2000,
    y=0.7,
    label.padding = unit(0.55, "lines"), # Rectangle size around label
    label.size = 0.20,
    color = "black",
    fill="white"
  )+geom_label(
    label="Alternating", 
    x=2000,
    y=0.45,
    label.padding = unit(0.55, "lines"), # Rectangle size around label
    label.size = 0.20,
    color = "black",
    fill="white"
  )+geom_label(
    label="Interrupted", 
    x=2000,
    y=0.2,
    label.padding = unit(0.55, "lines"), # Rectangle size around label
    label.size = 0.20,
    color = "black",
    fill="white"
  )+
  theme_bw()+
  labs(
    title = "DPS Change Outcomes: Trends Over Time",
    caption = "Note: one case of collapse excluded from calculations",
    x = "Year",
    y = "Outcome likelihood"
  ) +
  theme(
    axis.title = element_text(size = 12),  # increase axis title size
    axis.title.x = element_text(margin = margin(t = 10)), 
    axis.title.y = element_text(margin = margin(r = 15)),
    axis.text.x = element_text(size = 10),
    axis.text.y = element_text(size = 10),
    legend.text = element_text(size = 12),
    plot.title = element_text(hjust = 0.5),
    plot.caption = element_text(),
    plot.margin = unit(c(0.2, 0.2, 0.2, 0.2), "inches"))

#let's now combine all four plots into one, just as it appears in the published paper
#for that, we will need cowplot, although other alternatives are also available if you want to try them out

#install.packages("cowplot")

ggdraw() +
  draw_plot(p1, x = 0, y = 0.5, width = 0.4, height = 0.5) +
  draw_plot(p3, x = 0.5, y = 0.5, width = 0.4, height = 0.5) +
  draw_plot(p2, x = 0, y = 0, width = 0.4, height = 0.5) +
  draw_plot(p4, x = 0.5, y = 0, width = 0.4, height = 0.5) +
  draw_plot_label(label = c("A", "B", "C", "D"), size = 12, 
                  x = c(0, 0.5, 0, 0.5), y = c(1, 1, 0.5, 0.5))

#and that's it - you should be able to observe all of the plots available in the published paper, based on the dataset.
#Thanks for using and reading the GDPS Dataset! Boris.